

import numpy as np
def getRegions(genMap, regionSize):
    nLoci = len(genMap)
    # Genetic map is a matrix first column is chr, second column is something else.
    # We want to return a matrix with chromosome, start, stop.
    regions = []
    chrom = genMap[0]
    numMarkers = 1
    start = 0
    for i in range(nLoci):

        if chrom != genMap[i]:
            regions.append((chrom, start, i))
            start = i
            chrom = genMap[i]
        elif i - start >= regionSize:
            regions.append((chrom, start, i))
            start = i
    regions.append((chrom, start, nLoci))

    return np.array(regions, dtype = np.int64)

def getEffects(haplotypes, regions, qtls):
    effects = np.full((regions.shape[0], qtls.shape[0]), 0, dtype = np.float32)
    for i, region in enumerate(regions):
        chrom, start, stop = region
        effects[i,:] = np.sum(haplotypes[None, start:stop]*qtls[:, start:stop], 1)
    return effects

def imputeIndividualFromPhenotypes(ind, sire, dam, phenotypes, qtls, chrMap):
    nLoci = len(chrMap)
    regions = getRegions(chrMap, 1)

    patEffects = (getEffects(sire.haplotypes[0], regions, qtls), getEffects(sire.haplotypes[1], regions, qtls)) 
    matEffects = (getEffects(dam.haplotypes[0], regions, qtls), getEffects(dam.haplotypes[1], regions, qtls))

    patAssign, matAssign = fitAssignments(patEffects, matEffects, phenotypes, regions)
    ind.dosages = np.full(nLoci, 0, dtype = np.float32)
    for i, region in enumerate(regions):
        chrom, start, stop = region
        ind.dosages[start:stop] += (1-patAssign[i])*sire.haplotypes[0][start:stop]
        ind.dosages[start:stop] += patAssign[i]*sire.haplotypes[1][start:stop]

        ind.dosages[start:stop] += (1-matAssign[i])*dam.haplotypes[0][start:stop]
        ind.dosages[start:stop] += matAssign[i]*dam.haplotypes[1][start:stop]

def expNorm(vect):
    vect -= np.max(vect)
    tmp = np.exp(vect)
    return tmp/np.sum(tmp)

def fitAssignments(patEffects, matEffects, phenotypes, regions):
    nSamples = 10
    nRegions = regions.shape[0]

    sampleWeights = np.full(nSamples, 0, dtype = np.float32)
    patSamples = np.full((nSamples, nRegions), 0, dtype = np.float32)
    matSamples = np.full((nSamples, nRegions), 0, dtype = np.float32)
    for i in range(nSamples):
        sampleWeights[i], patSamples[i,:], matSamples[i,:] = sample(patEffects, matEffects, phenotypes, regions)

    weights = expNorm(sampleWeights)

    patOutput = np.full(nRegions, 0, dtype = np.float32)
    matOutput = np.full(nRegions, 0, dtype = np.float32)
    
    print(weights)
    print(patSamples)
    print(matSamples)
    
    for i in range(nSamples):
        patOutput += weights[i] * patSamples[i,:]
    for i in range(nSamples):
        matOutput += weights[i] * matSamples[i,:]

    return patOutput, matOutput

def sample(patEffects, matEffects, phenotypes, regions) :
    nRegions = regions.shape[0]
    matAssign = np.random.binomial(1, .5, size = nRegions)
    patAssign = np.random.binomial(1, .5, size = nRegions)
    score = scoreAssignment(matAssign, patAssign, patEffects, matEffects, phenotypes, regions)
    return (score, matAssign, patAssign)

def scoreAssignment(matAssign, patAssign, patEffects, matEffects, phenotypes, regions, recRate = .1):
    nRegions = len(matAssign)
    nTraits = patEffects[0].shape[1]
    # Get predicted phenotype.

    phenotype_pred = np.full(nTraits, 0, dtype = np.float32)
    for i in range(nRegions):
        phenotype_pred += matEffects[matAssign[i]][i,:]
        phenotype_pred += patEffects[patAssign[i]][i,:]

    distance = np.sum((phenotypes - phenotype_pred)**2)

    nRec = 0
    assignment = matAssign[0]
    chrom = regions[0, 0]
    for i in range(1, nRegions):
        if regions[i,0] != chrom:
            chrom = regions[i,0]
            assignment = matAssign[i]
        if assignment != matAssign[i]:
            nRec += 1
            assignment = matAssign[i]

    assignment = patAssign[0]
    chrom = regions[0, 0]
    for i in range(1, nRegions):
        if regions[i,0] != chrom:
            chrom = regions[i,0]
            assignment = patAssign[i]
        if assignment != patAssign[i]:
            nRec += 1
            assignment = patAssign[i]

    return -distance + nRec*np.log(recRate) + (nRegions - nRec)*np.log(1-recRate)







